ENV CHECK (embeddings + multiprocessing)¶

In [1]:
# figure settings
import scanpy as sc
import anndata as ad
import os
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=250,
                     frameon=True, vector_friendly=True,
                     color_map="YlGnBu", format='pdf', transparent=False,
                     ipython_format='png2x')

DATADIR = "/storage/groups/ce01/workspace/mobisc_anna/processed_datasets/share_seq_brain"
#%ls $DATADIR
files = [os.path.join(DATADIR, 'old_anndata_brain_share_seq_geneactivity_rna_merged.h5ad')]

int_files = ['liger_jnmf_integration.h5ad', 
             'scJoint_integration.h5ad', 
             'seurat_cca_integration.h5ad', 
             'seurat_rpca_integration.h5ad']

DATADIR = "/storage/groups/ce01/workspace/mobisc_anna/integrated_outputs/share_seq_brain_shared_genes_full"
#%ls $DATADIR
for i in int_files:
    files.append(os.path.join(DATADIR, i))
    
data = ad.read(files[0])
sc.pp.pca(data)
sc.pp.neighbors(data)
sc.tl.umap(data)
sc.pl.umap(data, color='broad_annotations')

from multiprocessing import Process, Queue from time import sleep

def my_func(n, l): if n % 2 == 0: sleep(5) for i in l: print(f'Process {n}: {i}\n')

processes = [] my_list = range(1, 5) for i in range(8): p = Process(target=my_func, args=(i, my_list)) p.start() processes.append(p)

for p in processes: p.join()

FURTHER ANALYSIS¶

In [2]:
# Load packages 
import scanpy as sc
import anndata as ad
import os
import pandas as pd
import matplotlib.pyplot as plt
import scmoib
import numpy as np
import tqdm
During startup - Warning messages:
1: Setting LC_TIME failed, using "C" 
2: Setting LC_MONETARY failed, using "C" 
3: Setting LC_PAPER failed, using "C" 
4: Setting LC_MEASUREMENT failed, using "C" 
In [3]:
# figure settings
sc.set_figure_params(scanpy=True, dpi=80, dpi_save=250,
                     frameon=True, vector_friendly=True,
                     color_map="YlGnBu", format='pdf', transparent=False,
                     ipython_format='png2x')
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scanpy/_settings.py:447: DeprecationWarning:

`set_matplotlib_formats` is deprecated since IPython 7.23, directly use `matplotlib_inline.backend_inline.set_matplotlib_formats()`

Load the processed data¶

DATADIR = "/storage/groups/ce01/workspace/mobisc_anna/processed_datasets/share_seq_brain"

%ls $DATADIR¶

files = [os.path.join(DATADIR, 'old_anndata_brain_share_seq_geneactivity_rna_merged.h5ad')]

int_files = ['liger_jnmf_integration.h5ad', 'scJoint_integration.h5ad', 'seurat_cca_integration.h5ad', 'seurat_rpca_integration.h5ad']

DATADIR = "/storage/groups/ce01/workspace/mobisc_anna/integrated_outputs/share_seq_brain_shared_genes_full"

%ls $DATADIR¶

for i in int_files: files.append(os.path.join(DATADIR, i))

ids = ['unintegrated', 'liger', 'scjoint', 'seurat_cca', 'seurat_rpca'] modes = ['raw', 'comps', 'comps', 'count', 'count'] col = ['broad_annotations'] * 5 reps = [None, 'X_inmf_embedding', 'X_scjoint_embedding', None, None] my_obj = scmoib.tl.MetricsCalculator() adata_list = []

In [4]:
DATADIR = "/mnt/znas/icb_zstore01/groups/ce01/workspace/mobisc_anna/processed_datasets/PBMC_10X/"
#%ls $DATADIR
files = [os.path.join(DATADIR, 'old_anndata_pbmc_10x_processed_merged_rna_first_full_feature.h5ad')]

int_files = ['liger/PBMC_10X_jNMF.h5ad', 
             'seurat_cca/pbmc_10x_seurat_cca_rna_first.h5ad', 
             'bindsc/pbmc_10x_bindsc_rna_first_full_feature.h5ad', 
             'scalex/RNA-ATAC_pbmc_10x_scalex_processed_full_feature.h5ad', 
             'seurat_rpca/pbmc_10x_seurat_rpca_rna_first.h5ad']

DATADIR = "/storage/groups/ce01/workspace/mobisc_anna/integrated_outputs/"
%ls $DATADIR
for i in int_files:
    files.append(os.path.join(DATADIR, i))
    
ids = ['unintegrated', 'liger', 'seurat_cca', 'bindsc', 'scalex', 'seurat_rpca']
modes = ['raw', 'comps', 'count', 'count', 'graph', 'count']
col = ['broad cell types', 'broad.cell.types'] + ['broad cell types'] * 4
reps = [None, 'X_iNMF', None, None, None, None]
my_obj = scmoib.tl.MetricsCalculator()
adata_list = []
PBMC_10x/  pbmc_jnmf.h5ad    scalex/       share_seq_brain_shared_genes_full/
bindsc/    pbmc_uinmf.h5ad*  seurat_cca/
liger/     scJoint/          seurat_rpca/
In [5]:
%%time


for i in tqdm.tqdm(files):
    pbmc = ad.read(i)
    adata_id = ids[files.index(i)]
    clust_id = 'louvain'
    ct_id = col[files.index(i)]
    bc_list = list(pbmc.obs.index)
    bc_list1 = bc_list[:len(bc_list) // 2]
    bc_list2 = bc_list[len(bc_list) // 2:]
    mode = modes[files.index(i)]
    rep = reps[files.index(i)]
    my_obj.check_anndata(pbmc, ct_id, mode=mode, use_rep=rep)
    print('-' * 10)
    my_obj.node_metrics(pbmc, adata_id, bc_list1, bc_list2, ct_id, n_jobs=8)
    my_obj.spec_dist(pbmc, adata_id=adata_id, n_metr=10)
    try:
        my_obj._accuracy_paired_omics(pbmc, adata_id, bc_list1, bc_list2, 'omic_batch', clust_id)
        my_obj._accuracy_paired_omics_per_cell_type(pbmc, adata_id, bc_list1, bc_list2, 
                                                    'omic_batch', clust_id, ct_id)
        my_obj.ami(pbmc, adata_id, ct_id, clust_id)
        my_obj.ari(pbmc, adata_id, ct_id, clust_id)
        my_obj.nmi(pbmc, adata_id, ct_id, clust_id)
        my_obj.homogeneity(pbmc, adata_id, ct_id, clust_id)
    
    except ValueError as err:
        print(f"Trouble with {adata_id}")
        print(err.args[0])

    my_obj._graph_connectivity(pbmc, adata_id, ct_id)
    try:
        my_obj.silhouette(pbmc, adata_id, batch_key='omic_batch', embed='X_umap')
        my_obj.silhouette_batch(pbmc, adata_id, batch_key='omic_batch', cell_label=ct_id, embed='X_umap')
    except:
        print('Silhouette failed')
    my_obj._average_dist_between_matching_barcodes(pbmc, adata_id, bc_list1, bc_list2, metric='cosine')
    my_obj.isolated_labels_score(pbmc, adata_id, ct_id, 'omic_batch', 'X_umap')
    adata_list.append(pbmc)
my_obj.get_df()
  0%|                                                     | 0/6 [00:00<?, ?it/s]
Running pre-flight check
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/sklearn/manifold/_t_sne.py:783: FutureWarning:

The default initialization in TSNE will change from 'random' to 'pca' in 1.2.

Clustering...
step 0
got 23 at resolution 1.5
step 1
got 15 at resolution 0.75
step 2
got 11 at resolution 0.375
step 3
got 7 at resolution 0.1875
step 4
got 11 at resolution 0.28125
step 5
got 10 at resolution 0.234375
----------
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/utils/dijkstra.py:137: UserWarning:

Warning! You are running the high load graph method.
Test run information:
CPU: 8 cores
Wall time: 6 minutes
RAM: 4 GB(?)
Number of barcode pairs: 11k

/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/_node_metrics.py:51: RuntimeWarning:

Mean of empty slice.

/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/numpy/core/_methods.py:189: RuntimeWarning:

invalid value encountered in double_scalars

/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scib-1.0.0-py3.7.egg/scib/metrics/isolated_labels.py:57: DeprecationWarning:

The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.

 17%|██████▊                                  | 1/6 [22:51<1:54:18, 1371.60s/it]/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/compat/__init__.py:183: FutureWarning:

Moving element from .uns['neighbors']['distances'] to .obsp['distances'].

This is where adjacency matrices should go now.

Running pre-flight check
WARNING: .obsp["connectivities"] have not been computed using umap
Clustering...
step 0
got 36 at resolution 1.5
step 1
got 26 at resolution 0.75
step 2
got 18 at resolution 0.375
step 3
got 12 at resolution 0.1875
step 4
got 9 at resolution 0.09375
step 5
got 11 at resolution 0.140625
step 6
got 10 at resolution 0.1171875
----------
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/utils/dijkstra.py:137: UserWarning:

Warning! You are running the high load graph method.
Test run information:
CPU: 8 cores
Wall time: 6 minutes
RAM: 4 GB(?)
Number of barcode pairs: 11k

/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scib-1.0.0-py3.7.egg/scib/metrics/isolated_labels.py:57: DeprecationWarning:

The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.

 33%|██████████████▋                             | 2/6 [31:03<56:57, 854.29s/it]
Running pre-flight check
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/sklearn/manifold/_t_sne.py:783: FutureWarning:

The default initialization in TSNE will change from 'random' to 'pca' in 1.2.

Clustering...
step 0
got 21 at resolution 1.5
step 1
got 13 at resolution 0.75
step 2
got 9 at resolution 0.375
step 3
got 12 at resolution 0.5625
step 4
got 11 at resolution 0.46875
step 5
got 10 at resolution 0.421875
----------
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/utils/dijkstra.py:137: UserWarning:

Warning! You are running the high load graph method.
Test run information:
CPU: 8 cores
Wall time: 6 minutes
RAM: 4 GB(?)
Number of barcode pairs: 11k

/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scib-1.0.0-py3.7.egg/scib/metrics/isolated_labels.py:57: DeprecationWarning:

The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.

 50%|██████████████████████                      | 3/6 [40:21<35:56, 718.78s/it]/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/h5py/_hl/dataset.py:541: DeprecationWarning:

Passing None into shape arguments as an alias for () is deprecated.

Running pre-flight check
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/sklearn/manifold/_t_sne.py:783: FutureWarning:

The default initialization in TSNE will change from 'random' to 'pca' in 1.2.

Clustering...
step 0
got 21 at resolution 1.5
step 1
got 16 at resolution 0.75
step 2
got 11 at resolution 0.375
step 3
got 8 at resolution 0.1875
step 4
got 9 at resolution 0.28125
step 5
got 11 at resolution 0.328125
step 6
got 9 at resolution 0.3046875
step 7
got 11 at resolution 0.31640625
step 8
got 9 at resolution 0.310546875
step 9
got 10 at resolution 0.3134765625
----------
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/utils/dijkstra.py:137: UserWarning:

Warning! You are running the high load graph method.
Test run information:
CPU: 8 cores
Wall time: 6 minutes
RAM: 4 GB(?)
Number of barcode pairs: 11k

/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scib-1.0.0-py3.7.egg/scib/metrics/isolated_labels.py:57: DeprecationWarning:

The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.

 67%|█████████████████████████████▎              | 4/6 [52:16<23:54, 717.30s/it]/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/h5py/_hl/dataset.py:541: DeprecationWarning:

Passing None into shape arguments as an alias for () is deprecated.

Running pre-flight check
Clustering...
step 0
got 18 at resolution 1.5
step 1
got 13 at resolution 0.75
step 2
got 8 at resolution 0.375
step 3
got 11 at resolution 0.5625
step 4
got 10 at resolution 0.46875
----------
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/utils/dijkstra.py:137: UserWarning:

Warning! You are running the high load graph method.
Test run information:
CPU: 8 cores
Wall time: 6 minutes
RAM: 4 GB(?)
Number of barcode pairs: 11k

KeysView(AxisArrays with keys: X_umap, latent)
Silhouette failed
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scib-1.0.0-py3.7.egg/scib/metrics/isolated_labels.py:57: DeprecationWarning:

The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.

 83%|███████████████████████████████████       | 5/6 [1:05:11<12:18, 738.33s/it]
Running pre-flight check
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/sklearn/manifold/_t_sne.py:783: FutureWarning:

The default initialization in TSNE will change from 'random' to 'pca' in 1.2.

Clustering...
step 0
got 21 at resolution 1.5
step 1
got 16 at resolution 0.75
step 2
got 11 at resolution 0.375
step 3
got 8 at resolution 0.1875
step 4
got 9 at resolution 0.28125
step 5
got 10 at resolution 0.328125
----------
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scmoib-0.0.1+7.gfe906bc.dirty-py3.7.egg/scmoib/tools/metrics/utils/dijkstra.py:137: UserWarning:

Warning! You are running the high load graph method.
Test run information:
CPU: 8 cores
Wall time: 6 minutes
RAM: 4 GB(?)
Number of barcode pairs: 11k

/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/scib-1.0.0-py3.7.egg/scib/metrics/isolated_labels.py:57: DeprecationWarning:

The default dtype for empty Series will be 'object' instead of 'float64' in a future version. Specify a dtype explicitly to silence this warning.

100%|██████████████████████████████████████████| 6/6 [1:15:55<00:00, 759.24s/it]
CPU times: user 3h 30min 13s, sys: 1h 20min 46s, total: 4h 51min
Wall time: 1h 15min 55s

Out[5]:
conn_ratio spec_dist_10 accuracy AMI ARI NMI homogeneity graph connectivity broad cell types sil_global sil_clus pairwise_distance il_sil il_clus graph connectivity broad.cell.types
unintegrated 0.0 NaN 0.000000 0.601217 0.421081 0.601592 0.615105 0.499798 0.723077 0.349730 0.761411 NaN NaN NaN
liger 1.0 0.629795 0.322143 0.532695 0.448401 0.533130 0.551172 NaN 0.652315 0.466547 0.861671 NaN NaN 0.633627
seurat_cca 1.0 0.938760 0.849163 0.729525 0.646358 0.729793 0.710242 0.979320 0.507945 0.720455 0.837083 NaN NaN NaN
bindsc 1.0 0.398853 0.010280 0.629792 0.457215 0.630131 0.661708 0.936445 0.524120 0.704856 0.687280 NaN NaN NaN
scalex 1.0 0.875036 0.388737 0.666833 0.482170 0.667135 0.707853 0.988257 NaN NaN 0.705320 NaN NaN NaN
seurat_rpca 1.0 0.836273 0.701055 0.635951 0.578401 0.636312 0.621872 0.966974 0.520412 0.771678 0.890710 NaN NaN NaN
In [6]:
scmoib.pl.cumulative_node_group(adata_list[1:], ids[1:])
In [7]:
df = my_obj.get_df()
df
Out[7]:
conn_ratio spec_dist_10 accuracy AMI ARI NMI homogeneity graph connectivity broad cell types sil_global sil_clus pairwise_distance il_sil il_clus graph connectivity broad.cell.types
unintegrated 0.0 NaN 0.000000 0.601217 0.421081 0.601592 0.615105 0.499798 0.723077 0.349730 0.761411 NaN NaN NaN
liger 1.0 0.629795 0.322143 0.532695 0.448401 0.533130 0.551172 NaN 0.652315 0.466547 0.861671 NaN NaN 0.633627
seurat_cca 1.0 0.938760 0.849163 0.729525 0.646358 0.729793 0.710242 0.979320 0.507945 0.720455 0.837083 NaN NaN NaN
bindsc 1.0 0.398853 0.010280 0.629792 0.457215 0.630131 0.661708 0.936445 0.524120 0.704856 0.687280 NaN NaN NaN
scalex 1.0 0.875036 0.388737 0.666833 0.482170 0.667135 0.707853 0.988257 NaN NaN 0.705320 NaN NaN NaN
seurat_rpca 1.0 0.836273 0.701055 0.635951 0.578401 0.636312 0.621872 0.966974 0.520412 0.771678 0.890710 NaN NaN NaN
In [8]:
df.drop(columns=['conn_ratio', 'il_clus', 'il_sil'], inplace=True)
In [9]:
df
Out[9]:
spec_dist_10 accuracy AMI ARI NMI homogeneity graph connectivity broad cell types sil_global sil_clus pairwise_distance graph connectivity broad.cell.types
unintegrated NaN 0.000000 0.601217 0.421081 0.601592 0.615105 0.499798 0.723077 0.349730 0.761411 NaN
liger 0.629795 0.322143 0.532695 0.448401 0.533130 0.551172 NaN 0.652315 0.466547 0.861671 0.633627
seurat_cca 0.938760 0.849163 0.729525 0.646358 0.729793 0.710242 0.979320 0.507945 0.720455 0.837083 NaN
bindsc 0.398853 0.010280 0.629792 0.457215 0.630131 0.661708 0.936445 0.524120 0.704856 0.687280 NaN
scalex 0.875036 0.388737 0.666833 0.482170 0.667135 0.707853 0.988257 NaN NaN 0.705320 NaN
seurat_rpca 0.836273 0.701055 0.635951 0.578401 0.636312 0.621872 0.966974 0.520412 0.771678 0.890710 NaN
In [10]:
df.sum(axis=1)
Out[10]:
unintegrated    4.573011
liger           5.631496
seurat_cca      7.648644
bindsc          5.640680
scalex          5.481340
seurat_rpca     7.159638
dtype: float64
In [11]:
for i,j in zip(adata_list, ids):
    print(j)
    try:
        sc.pl.umap(i, color=['broad cell types', 'omic_batch'], wspace=0.7)
    except:
        sc.pl.umap(i, color=['broad.cell.types', 'omic_batch'], wspace=0.7)
unintegrated
liger
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/_core/anndata.py:1228: FutureWarning:

The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object.

... storing 'orig.ident' as categorical
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/_core/anndata.py:1228: FutureWarning:

The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object.

... storing 'filter_barcodes' as categorical
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/_core/anndata.py:1228: FutureWarning:

The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object.

... storing 'louvain2_rna' as categorical
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/_core/anndata.py:1228: FutureWarning:

The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object.

... storing 'filter' as categorical
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/_core/anndata.py:1228: FutureWarning:

The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object.

... storing 'omic_batch' as categorical
/home/icb/atai.dobrynin/miniconda3/envs/test2/lib/python3.7/site-packages/anndata/_core/anndata.py:1228: FutureWarning:

The `inplace` parameter in pandas.Categorical.reorder_categories is deprecated and will be removed in a future version. Reordering categories will always return a new Categorical object.

... storing 'group' as categorical
<Figure size 1088x320 with 0 Axes>
seurat_cca
bindsc
scalex
seurat_rpca
In [ ]: